# Summary: This script replicates the article's main analysis

#####################################################
#----------Replicate Main Paper Results ------------#
#####################################################

options(scipen=999)
rm(list = ls())

# Load libraries
library(tidyverse)
library(lmtest)
library(sandwich)
library(margins)
library(DescTools)
library(insight)

# Set working directory
# setwd('~/Dataverse/')

# Load datasets
load(file = "./Data/df.RData")
load(file = "./Data/df_long.RData")

#############################################
# The Race-Gender Gap in Political Ambition #
#############################################

# FIGURE 1 #

m1 = glm(considered_running ~ racegender +
           political_interest +
           n_offices_qualified +
           partyID +
           encouraged +
           AGE7 +
           education +
           income +
           married, 
         data = df,
         family = binomial)

m1_robust = coeftest(m1, vcov. = vcovHC(m1, type="HC1"))

# Marginal Effects

Fig1_AME = summary(margins(m1, vcov = sandwich::vcovHC(m1, type = "HC1"))) %>% 
  dplyr::filter(grepl("race",factor)) %>% 
  as.data.frame()

Fig1_AME$factor = gsub("racegender","",Fig1_AME$factor)

Fig1_AME = Fig1_AME %>%
  mutate(racegender = factor(factor, levels = c("Black Men","Hispanic Men","Black Women","Hispanic Women","White Women")))

# Predicted Probabilities

m1_data = m1[["model"]]

Fig1_Pred = expand_grid(racegender = unique(m1_data$racegender)) %>%
  mutate(political_interest = mean(m1_data$political_interest),
         n_offices_qualified = mean(m1_data$n_offices_qualified),
         encouraged = Mode(m1_data$encouraged),
         partyID = Mode(m1_data$partyID),
         AGE7 = mean(m1_data$AGE7),
         education = mean(m1_data$education),
         income = mean(m1_data$income),
         married = Mode(m1_data$married),
         considered_running = 1)

n_pred = nrow(Fig1_Pred)

Fig1_Pred = rbind.data.frame(Fig1_Pred, m1_data[(n_pred+1):nrow(m1_data),])

predictions = as.data.frame(get_predicted(m1, newdata = Fig1_Pred, vcov =  vcovHC(m1, type = "HC1"),
                                          ci = 0.95))

Fig1_Pred = Fig1_Pred[1:n_pred,] %>%
  mutate(predicted = predictions$Predicted[1:n_pred],
         ci_low = predictions$CI_low[1:n_pred],
         ci_high = predictions$CI_high[1:n_pred])

# Create Figure:

m1_ame = Fig1_AME[,c("racegender","AME","lower","upper")]
m1_ame$quantity = "Avg. Marginal Effect"

m1_pred = Fig1_Pred[,c("racegender","predicted","ci_low","ci_high")]
m1_pred$quantity = "Predicted Probabilities"

cols = c("racegender","estimate","ci_low","ci_high","quantity")
names(m1_ame) = cols
names(m1_pred) = cols

m1_combined_plot = rbind.data.frame(m1_ame, m1_pred)

m1_combined_plot$racegender = factor(m1_combined_plot$racegender, levels = c("White Men","Black Men","Hispanic Men",
                                                                             "Black Women","Hispanic Women","White Women"
))

m1_combined_plot = m1_combined_plot %>%
  mutate(hline = ifelse(quantity == "Avg. Marginal Effect", 0, NA))

ggplot(m1_combined_plot, aes(x = racegender, y = estimate)) + 
  geom_pointrange(aes(ymin = ci_low, ymax = ci_high), position = position_dodge2(width = 0.5)) +
  theme_bw() +
  theme(legend.position = "bottom",
        legend.title = element_blank(),
        text = element_text(size = 25)) +
  xlab("") + 
  ylab("") +
  facet_wrap(~ quantity, ncol = 1, scales = "free_y") + 
  geom_hline(aes(yintercept = hline), linetype = "dotted", color = "red", linewidth = 1, na.rm = TRUE) +
  coord_flip()

ggsave(file=("./MainPaperResults/figure1.png"), width = 20, height = 20, units = "cm", dpi = 600)


###############################################
# The Race-Gender Ambition Gap Across Offices #
###############################################

# FIGURE 2 #

m2 = glm(considered_running ~ racegender + office +
           racegender*office +
           political_interest +
           qualified_office +
           partyID +
           encouraged +
           AGE7 +
           education +
           income +
           married, 
         data = df_long,
         family = binomial)

m2_robust = coeftest(m2, vcov. = vcovCL(m2, vcov = vcovCL, cluster = ~CaseId))

# Marginal Effects

racegender_ops = paste0("racegender",  c("White Women","Hispanic Men","Hispanic Women","Black Men","Black Women"))  

Fig2_AME = summary(margins(m2, at = list(office = c("School Board","City Council","Mayor",
                                                         "State Legislature","Governor",
                                                         "US House","US Senate","President")), vcov = sandwich::vcovCL(m2, vcov = vcovCL, cluster = ~CaseId))) %>% 
  dplyr::filter(factor %in% racegender_ops) %>% 
  as.data.frame()

Fig2_AME$factor = gsub("racegender","",Fig2_AME$factor)

Fig2_AME = Fig2_AME %>%
  mutate(racegender = factor(factor, levels = c("White Women","Hispanic Women","Black Women","Hispanic Men","Black Men")))

# Predicted Probabilities

m2_data = m2[["model"]]

Fig2_Pred = expand_grid(racegender = unique(m2_data$racegender), office = unique(m2_data$office)) %>%
  mutate(political_interest = mean(m2_data$political_interest),
         qualified_office = Mode(m2_data$qualified_office),
         encouraged = Mode(m2_data$encouraged),
         partyID = Mode(m2_data$partyID),
         AGE7 = mean(m2_data$AGE7),
         education = mean(m2_data$education),
         income = mean(m2_data$income),
         married = Mode(m2_data$married),
         considered_running = 1)

n_pred = nrow(Fig2_Pred)

Fig2_Pred = rbind.data.frame(Fig2_Pred, m2_data[(n_pred+1):nrow(m2_data),])

predictions = as.data.frame(get_predicted(m2, newdata = Fig2_Pred, vcov =  vcovCL(m2,  cluster = ~CaseId),
                                          ci = 0.95))

Fig2_Pred = Fig2_Pred[1:n_pred,] %>%
  mutate(predicted = predictions$Predicted[1:n_pred],
         ci_low = predictions$CI_low[1:n_pred],
         ci_high = predictions$CI_high[1:n_pred])

Fig2_Pred$racegender = factor(Fig2_Pred$racegender, levels = c("White Men","Black Men","Hispanic Men",
                                                                             "Black Women","Hispanic Women","White Women"
))

Fig2_Pred$office = factor(Fig2_Pred$office, levels = c("School Board","City Council","Mayor",
                                                                     "State Legislature","Governor","US House","US Senate","President"
))

# Create Figure:

m2_ame = Fig2_AME[,c("racegender","office","AME","lower","upper")]
m2_ame$quantity = "Avg. Marginal Effect"

m2_pred = Fig2_Pred[,c("racegender","office","predicted","ci_low","ci_high")]
m2_pred$quantity = "Predicted Probabilities"

cols = c("racegender","office","estimate","ci_low","ci_high","quantity")
names(m2_ame) = cols
names(m2_pred) = cols

m2_combined_plot = rbind.data.frame(m2_ame, m2_pred)

m2_combined_plot$racegender = factor(m2_combined_plot$racegender, levels = c("White Men","Black Men","Hispanic Men",
                                                                             "White Women","Black Women","Hispanic Women"))

m2_combined_plot$office = factor(m2_combined_plot$office, levels = c("School Board","City Council","Mayor",
                                                                     "State Legislature","Governor","US House","US Senate","President"
))


m2_combined_plot = m2_combined_plot %>%
  mutate(hline = ifelse(quantity == "Avg. Marginal Effect", 0, NA))

ggplot(m2_combined_plot, aes(x = office, y = estimate, group = racegender, linetype = racegender, color = racegender)) + 
  geom_pointrange(aes(ymin = ci_low, ymax = ci_high), position = position_dodge2(width = 0.5)) +
  scale_linetype_manual("Condition", values = c("solid", "solid", "solid", "longdash", "longdash", "longdash")) +
  scale_color_manual("Condition", values = c("black", "gray40", "gray65", "red", "red4", "orange3")) +
  theme_bw() +
  theme(legend.position = "bottom") +
  theme(legend.title = element_blank()) +
  xlab("") + 
  ylab("") +
  theme(text = element_text(size = 25)) +
  facet_wrap(~ quantity, ncol = 1, scales = "free_y") +
  geom_hline(aes(yintercept = hline), linetype = "dotted", color = "red", linewidth = 1, na.rm = TRUE)

ggsave(file=("./MainPaperResults/figure2.png"), width = 45, height = 26, units = "cm", dpi = 600)


#############################
# The Role of Encouragement #
#############################

# FIGURE 3 #

m3 = glm(considered_running ~ racegender + encouraged +
           racegender*encouraged +
           political_interest +
           n_offices_qualified +
           partyID +
           AGE7 +
           education +
           income +
           married, 
         data = df,
         family = binomial)

m3_robust = coeftest(m3, vcov. = vcovHC(m3, type="HC1"))

# Marginal Effects

racegender_ops = paste0("racegender",  c("White Women","Hispanic Men","Hispanic Women","Black Men","Black Women"))  

Fig3_AME = summary(margins(m3, at = list(encouraged = c("None","Non-Political","Political","Both")), vcov = sandwich::vcovHC(m3, type="HC1"))) %>% 
  dplyr::filter(factor %in% racegender_ops) %>% 
  as.data.frame()

Fig3_AME$factor = gsub("racegender","",Fig3_AME$factor)

Fig3_AME = Fig3_AME %>%
  mutate(racegender = factor(factor, levels = c("White Women","Hispanic Women","Black Women","Hispanic Men","Black Men")))

# Predicted Probabilities

m3_data = m3[["model"]]

Fig3_Pred = expand_grid(encouraged = unique(m3_data$encouraged), racegender = unique(m3_data$racegender)) %>%
  mutate(political_interest = mean(m3_data$political_interest),
         n_offices_qualified = mean(m3_data$n_offices_qualified),
         partyID = Mode(m3_data$partyID),
         AGE7 = mean(m3_data$AGE7),
         education = mean(m3_data$education),
         income = mean(m3_data$income),
         married = Mode(m3_data$married),
         considered_running = 1)

n_pred = nrow(Fig3_Pred)

Fig3_Pred = rbind.data.frame(Fig3_Pred, m3_data[(n_pred+1):nrow(m3_data),])

predictions = as.data.frame(get_predicted(m3, newdata = Fig3_Pred, vcov =  vcovHC(m3, type = "HC1"),
                                          ci = 0.95))

Fig3_Pred = Fig3_Pred[1:n_pred,] %>%
  mutate(predicted = predictions$Predicted[1:n_pred],
         ci_low = predictions$CI_low[1:n_pred],
         ci_high = predictions$CI_high[1:n_pred])

# Create Figure:

m3_ame = Fig3_AME[,c("racegender","encouraged","AME","lower","upper")]
m3_ame$quantity = "Avg. Marginal Effect"

m3_pred = Fig3_Pred[,c("racegender","encouraged","predicted","ci_low","ci_high")]
m3_pred$quantity = "Predicted Probabilities"

cols = c("racegender","encouraged","estimate","ci_low","ci_high","quantity")
names(m3_ame) = cols
names(m3_pred) = cols

m3_combined_plot = rbind.data.frame(m3_ame, m3_pred)

m3_combined_plot$racegender = factor(m3_combined_plot$racegender, levels = c("White Men","Black Men","Hispanic Men",
                                                                             "White Women","Black Women","Hispanic Women"))

m3_combined_plot$encouraged = factor(m3_combined_plot$encouraged, levels = c("None","Non-Political","Political","Both"))


m3_combined_plot = m3_combined_plot %>%
  mutate(hline = ifelse(quantity == "Avg. Marginal Effect", 0, NA))

# Plot
ggplot(m3_combined_plot, aes(x = encouraged, y = estimate, group = racegender, linetype = racegender, color = racegender)) + 
  geom_pointrange(aes(ymin = ci_low, ymax = ci_high), position = position_dodge2(width = 0.5)) +
  scale_linetype_manual("Condition", values = c("solid", "solid", "solid", "longdash", "longdash", "longdash")) +
  scale_color_manual("Condition", values = c("black", "gray40", "gray65", "red", "red4", "orange3")) +
  theme_bw() +
  theme(legend.position = "bottom") +
  theme(legend.title = element_blank()) +
  xlab("") + 
  ylab("") +
  theme(text = element_text(size = 25)) +
  facet_wrap(~ quantity, ncol = 1, scales = "free_y") +
  geom_hline(aes(yintercept = hline), linetype = "dotted", color = "red", linewidth = 1, na.rm = TRUE)

ggsave(file=("./MainPaperResults/figure3.png"), width = 37, height = 24, units = "cm", dpi = 600)

